Concepts taken from Advanced R.
y = 1:10
y
## [1] 1 2 3 4 5 6 7 8 9 10
z <- 1:10
z
## [1] 1 2 3 4 5 6 7 8 9 10
mean(x = 1:10)
## [1] 5.5
x
## Error in eval(expr, envir, enclos): object 'x' not found
mean(x <- 1:10)
## [1] 5.5
x
## [1] 1 2 3 4 5 6 7 8 9 10
system.time(x = lapply(1:10, function(x) {Sys.sleep(1); return(x)}))
## Error in system.time(x = lapply(1:10, function(x) {: unused argument (x = lapply(1:10, function(x) {
## Sys.sleep(1)
## return(x)
## }))
system.time(x <- lapply(1:10, function(x) {Sys.sleep(1); return(x)}))
## user system elapsed
## 0.015 0.001 10.036
covid <- read.csv("../../Data/covid19.csv")
str(covid)
## 'data.frame': 18620 obs. of 8 variables:
## $ Country.Region: chr "Afghanistan" "Afghanistan" "Afghanistan" "Afghanistan" ...
## $ Province.State: chr "" "" "" "" ...
## $ Lat : num 33 33 33 33 33 33 33 33 33 33 ...
## $ Long : num 65 65 65 65 65 65 65 65 65 65 ...
## $ Date : chr "2020-01-22" "2020-01-23" "2020-01-24" "2020-01-25" ...
## $ Confirmed : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Recovered : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Deaths : int 0 0 0 0 0 0 0 0 0 0 ...
covid[covid$Country.Region == "Switzerland" & covid$Confirmed == max(covid[covid$Country.Region == "Switzerland", "Confirmed"]),"Date"]
## [1] "2020-04-06"
covid |>
subset(Country.Region == "Switzerland") |>
subset(Confirmed == max(Confirmed)) |>
getElement("Date")
## [1] "2020-04-06"
Same with tidyverse.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
covid %>%
filter(Country.Region == "Switzerland") %>%
filter(Confirmed == max(Confirmed)) %>%
select("Date")
## Date
## 1 2020-04-06
Pipe into variable
covid %>%
filter(Country.Region == "Switzerland") %>%
filter(Confirmed == max(Confirmed)) %>%
select("Date") -> max_conf_date
max_conf_date
## Date
## 1 2020-04-06
covid %>%
filter(Country.Region == "Switzerland") %>%
ggplot() + geom_line(aes(Date, Confirmed, color = Country.Region, group = 1)) +
theme_classic(base_size = 15) +
theme(axis.text.x = element_text(angle = 90))
Write large data frame
matrix(rnorm(n = 10000000), ncol = 10) %>%
as.data.frame() %>%
write_csv(file = "../../Data/large_test_data.csv")
read.csv vs read_cvs vs
data.table vs vroom
library(bench)
library(data.table)
library(vroom)
library(DT)
bench::mark(
cur_data <- read.csv("../../Data/large_test_data.csv"),
cur_data <- readr::read_csv("../../Data/large_test_data.csv"),
cur_data <- data.table::fread("../../Data/large_test_data.csv"),
cur_data <- vroom::vroom("../../Data/large_test_data.csv"),
check = FALSE
) %>% DT::datatable()
## Warning: Some expressions had a GC in every iteration; so filtering is disabled.
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html